Package org.terrier.structures.indexing.singlepass.hadoop

Source Code of org.terrier.structures.indexing.singlepass.hadoop.TestSplitEmittedTerm

/*
* Terrier - Terabyte Retriever
* Webpage: http://terrier.org
* Contact: terrier{a.}dcs.gla.ac.uk
* University of Glasgow - School of Computing Science
* http://www.gla.uk
*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is TestSplitEmittedTerm.java.
*
* The Original Code is Copyright (C) 2004-2011 the University of Glasgow.
* All Rights Reserved.
*
* Contributor(s):
*   Richard McCreadie <richardm{a.}dcs.gla.ac.uk> (original author)
*   Craig Macdonald <craigm{a.}dcs.gla.ac.uk>
*/
package org.terrier.structures.indexing.singlepass.hadoop;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;

import junit.framework.TestCase;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;

import org.terrier.structures.indexing.singlepass.hadoop.SplitEmittedTerm.SETPartitioner;
import org.terrier.structures.indexing.singlepass.hadoop.SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm;
import org.terrier.structures.indexing.singlepass.hadoop.SplitEmittedTerm.SETRawComparatorTerm;
import org.terrier.structures.indexing.singlepass.hadoop.SplitEmittedTerm.SETRawComparatorTermSplitFlush;

/** Tests for SplitEmittedTerm, including the Comparators and Partitioners */
@SuppressWarnings("deprecation")
public class TestSplitEmittedTerm extends TestCase {
 
 
  public void testMethods() throws Exception
  {
    SplitEmittedTerm t1 = new SplitEmittedTerm("t1", 10, 34);
    assertEquals("t1", t1.getTerm());
    assertEquals(10, t1.getSplitno());
    assertEquals(34, t1.getFlushno());
   
    t1.setFlushno(11);
    assertEquals(10, t1.getSplitno());
    assertEquals(11, t1.getFlushno());
   
    t1.setSplitno(5);
    assertEquals(5, t1.getSplitno());
    assertEquals(11, t1.getFlushno());
   
    t1.setTerm("t2");
    assertEquals("t2", t1.getTerm());
  }
 
  private void checkWritable(final String t, final int split, final int flush) throws Exception
  {
    SplitEmittedTerm t1 = new SplitEmittedTerm(t, split, flush);
    byte[] b = toBytes(t1);
   
    SplitEmittedTerm t2 = new SplitEmittedTerm();
    t2.readFields(new DataInputStream(new ByteArrayInputStream(b)));
    assertTrue(t1.equals(t2));
    assertTrue(t2.equals(t1));
    assertEquals(t, t2.getTerm());
    assertEquals(split, t2.getSplitno());
    assertEquals(flush, t2.getFlushno());
  }
 
  public void testWritable() throws Exception
  {
    checkWritable("t1", 10, 34);
    checkWritable("t1", Integer.MAX_VALUE, Integer.MAX_VALUE);
  }
 
  private byte[] toBytes(Writable w) throws Exception
  {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataOutputStream dos = new DataOutputStream(baos);
    w.write(dos);
    return baos.toByteArray();
  }
 
  @edu.umd.cs.findbugs.annotations.SuppressWarnings(
      value="DM_STRING_CTOR",
      justification="Check String.equals is used, not ==")
  private void checkEqualityTerm(String t, int split, int flush) throws Exception
  {
    SplitEmittedTerm t1 = new SplitEmittedTerm(t, split, flush);
    SETRawComparatorTerm compare = new SETRawComparatorTerm();
    SETRawComparatorTermSplitFlush compare2 = new SETRawComparatorTermSplitFlush();
    assertEquals(0, t1.compareTo(t1));
    assertTrue(t1.equals(t1));
    assertEquals(0, compare.compare(t1, t1));
    assertEquals(0, compare2.compare(t1, t1));
    byte[] t1w = toBytes(t1);
    assertEquals(0, compare.compare(t1w, 0, t1w.length, t1w, 0, t1w.length));
    assertEquals(0, compare2.compare(t1w, 0, t1w.length, t1w, 0, t1w.length));
   
    SplitEmittedTerm t1a = new SplitEmittedTerm(new String(t), split, flush);
    assertEquals(0, t1.compareTo(t1a));
    assertEquals(0, t1a.compareTo(t1));
    assertTrue(t1.equals(t1a));
    assertTrue(t1a.equals(t1));
    assertEquals(0, compare.compare(t1, t1a));
    assertEquals(0, compare.compare(t1a, t1));
    assertEquals(0, compare2.compare(t1, t1a));
    assertEquals(0, compare2.compare(t1a, t1));
  }
 
  public void testEqualityTerm() throws Exception
  {
    checkEqualityTerm("t1", 0, 0);
    checkEqualityTerm("t1", Integer.MAX_VALUE, Integer.MAX_VALUE);   
  }
 
  @edu.umd.cs.findbugs.annotations.SuppressWarnings(
        value="DM_STRING_CTOR",
        justification="Check String.equals is used, not ==")
  private void checkEqualityTermSplit(String t, int split1, int split2, int flush) throws Exception
  {
    SplitEmittedTerm t1 = new SplitEmittedTerm(t, split1, flush);
    SplitEmittedTerm t2 = new SplitEmittedTerm(new String(t), split2, flush);
    SETRawComparatorTerm compare = new SETRawComparatorTerm();
    SETRawComparatorTermSplitFlush compare2 = new SETRawComparatorTermSplitFlush();
   
    assertEquals(0, t1.compareTo(t1));
         
    assertFalse(t1.equals(t2));
    assertEquals(0, compare.compare(t1, t2));
    assertTrue(compare2.compare(t1, t2) < 0);
   
    byte[] t1w = toBytes(t1);
    byte[] t2w = toBytes(t2);
    assertEquals(0, compare.compare(t1w, 0, t1w.length, t2w, 0, t2w.length));
    assertTrue("Comparing t1 to t2 as bytes", compare2.compare(t1w, 0, t1w.length, t2w, 0, t2w.length)< 0);
  }
 
  public void testEqualityTermSplit() throws Exception
  {
    checkEqualityTermSplit("t1", 0, 1, 0);
    checkEqualityTermSplit("t1", Integer.MAX_VALUE -1, Integer.MAX_VALUE, Integer.MAX_VALUE);   
  }
 
  private void compareTerm(SplitEmittedTerm t1, SplitEmittedTerm t2) throws Exception
  {
    SETRawComparatorTerm compare = new SETRawComparatorTerm();
    //check for inequality of each pair
    assertFalse(t1.equals(t2));
    assertFalse(t2.equals(t1));
         
    assertTrue(t1.compareTo(t2) < 0);
    assertTrue(t2.compareTo(t1) > 0);
    assertTrue(compare.compare(t1, t2) < 0);
    assertTrue(compare.compare(t2, t1) > 0);
   
    SETRawComparatorTermSplitFlush compare2 = new SETRawComparatorTermSplitFlush();
    assertTrue(compare2.compare(t1, t2) < 0);
    assertTrue(compare2.compare(t2, t1) > 0);
    byte[] t1w = toBytes(t1);
    byte[] t2w = toBytes(t2);
    assertTrue(compare.compare(t1w, 0, t1w.length, t2w, 0, t2w.length)< 0);
    assertTrue("Comparing t1 to t2 as bytes", compare2.compare(t1w, 0, t1w.length, t2w, 0, t2w.length) < 0);
  }

  public void testCompareTerm() throws Exception
  {   
    SplitEmittedTerm t1 = new SplitEmittedTerm("t1", 0, 0);
    SplitEmittedTerm t2 = new SplitEmittedTerm("t2", 0, 0);
    compareTerm(t1, t2);
 
    t1 = new SplitEmittedTerm("t1", Integer.MAX_VALUE, Integer.MAX_VALUE);
    t2 = new SplitEmittedTerm("t2", Integer.MAX_VALUE, Integer.MAX_VALUE);
    compareTerm(t1, t2);
  }
 
  private void compareTermSplit(SplitEmittedTerm t1, SplitEmittedTerm t2) throws Exception
  {
    SETRawComparatorTerm compare = new SETRawComparatorTerm();
    //check for inequality of each pair     
    assertFalse(t1.equals(t2));
    assertFalse(t2.equals(t1));
         
    assertTrue(t1.compareTo(t2) < 0);
    assertTrue(t2.compareTo(t1) > 0);
    assertEquals(0, compare.compare(t1, t2));
    assertEquals(0, compare.compare(t2, t1));
   
    SETRawComparatorTermSplitFlush compare2 = new SETRawComparatorTermSplitFlush();
    assertTrue(compare2.compare(t1, t2) < 0);
    assertTrue(compare2.compare(t2, t1) > 0);
    byte[] t1w = toBytes(t1);
    byte[] t2w = toBytes(t2);
    assertEquals(0, compare.compare(t1w, 0, t1w.length, t2w, 0, t2w.length));
    assertEquals(0, compare.compare(t2w, 0, t2w.length, t1w, 0, t1w.length));
    assertTrue(compare2.compare(t1w, 0, t1w.length, t2w, 0, t2w.length) < 0);
    assertTrue(compare2.compare(t2w, 0, t2w.length, t1w, 0, t1w.length) > 0);
  }
 
  public void testCompareTermSplit() throws Exception
  {
   
    SplitEmittedTerm t1 = new SplitEmittedTerm("t1", 0, 0);
    SplitEmittedTerm t2 = new SplitEmittedTerm("t1", 1, 0);
    compareTermSplit(t1, t2);
   
    t1 = new SplitEmittedTerm("t1", Integer.MAX_VALUE-1, 0);
    t2 = new SplitEmittedTerm("t1", Integer.MAX_VALUE, 0);
    compareTermSplit(t1, t2);
   
  }
 
  static final int sign(int a)
  {
    if (a < 0)
      return -1;
    if (a > 0)
      return 1;
    return 0;
  }
 
  private void compareTermFlush(SplitEmittedTerm t1, SplitEmittedTerm t2) throws Exception
  {
    SETRawComparatorTerm compare = new SETRawComparatorTerm();
    //check for inequality of each pair     
    assertFalse(t1.equals(t2));
    assertFalse(t2.equals(t1));
         
    assertTrue(t1.compareTo(t2) < 0);
    assertTrue(t2.compareTo(t1) > 0);
    assertEquals(sign(t1.getTerm().compareTo(t2.getTerm())), sign(compare.compare(t1, t2)));
    assertEquals(sign(t2.getTerm().compareTo(t1.getTerm())), sign(compare.compare(t2, t1)));
   
    SETRawComparatorTermSplitFlush compare2 = new SETRawComparatorTermSplitFlush();
    assertTrue(compare2.compare(t1, t2) < 0);
    assertTrue(compare2.compare(t2, t1) > 0);
    byte[] t1w = toBytes(t1);
    byte[] t2w = toBytes(t2);
    assertEquals(sign(t1.getTerm().compareTo(t2.getTerm())), sign(compare.compare(t1w, 0, t1w.length, t2w, 0, t2w.length)));
    assertEquals(sign(t2.getTerm().compareTo(t1.getTerm())), sign(compare.compare(t2w, 0, t2w.length, t1w, 0, t1w.length)));
    assertTrue(compare2.compare(t1w, 0, t1w.length, t2w, 0, t2w.length) < 0);
    assertTrue(compare2.compare(t2w, 0, t2w.length, t1w, 0, t1w.length) > 0);
  }
 
  public void testCompareTermFlush() throws Exception
  {   
    SplitEmittedTerm t1,t2;
    t1 = new SplitEmittedTerm("t1", 0, 0);
    t2 = new SplitEmittedTerm("t1", 0, 1);
    compareTermFlush(t1, t2);
   
    t1 = new SplitEmittedTerm(".", 0, 0);
    t2 = new SplitEmittedTerm("0", 0, 0);
    compareTermFlush(t1, t2);
   
    t1 = new SplitEmittedTerm("0", 0, 0);
    t2 = new SplitEmittedTerm("\\", 0, 0);   
    compareTermFlush(t1, t2);
   
    t1 = new SplitEmittedTerm("t1", 0, Integer.MAX_VALUE -1);
    t2 = new SplitEmittedTerm("t1", 0, Integer.MAX_VALUE );
    compareTermFlush(t1, t2);
  }
 
  /* Test cases for SETPartitionerLowercaseAlphaTerm */
  public void testSETPLAT() throws Exception
  {
    final SETPartitionerLowercaseAlphaTerm p = new SETPartitionerLowercaseAlphaTerm();
    //single partition
    assertEquals(0, p.calculatePartition('0', 1));
    assertEquals(0, p.calculatePartition('9', 1));
    assertEquals(0, p.calculatePartition('-', 1));
    assertEquals(0, p.calculatePartition('a', 1));
    assertEquals(0, p.calculatePartition('z', 1));
    assertEquals(0, p.calculatePartition('}', 1));
    //two partitions
    assertEquals(0, p.calculatePartition('(', 2));
    assertEquals(0, p.calculatePartition('.', 2));
    assertEquals(0, p.calculatePartition(')', 2));
    assertEquals(0, p.calculatePartition('\\', 2));
    assertEquals(0, p.calculatePartition('/', 2));
   
    assertEquals(0, p.calculatePartition('0', 2));
    assertEquals(0, p.calculatePartition('9', 2));
    assertEquals(0, p.calculatePartition('-', 2));
    assertEquals(0, p.calculatePartition('a', 2));
    assertEquals(0, p.calculatePartition('l', 2));
    assertEquals(0, p.calculatePartition('m', 2));
    assertEquals(1, p.calculatePartition('n', 2));
    assertEquals(1, p.calculatePartition('o', 2));
    assertEquals(1, p.calculatePartition('z', 2));
    assertEquals(1, p.calculatePartition('}', 2));
    //(all upper case goto partition 0)
    assertEquals(0, p.calculatePartition('M', 2));
    assertEquals(0, p.calculatePartition('N', 2));
    assertEquals(0, p.calculatePartition('O', 2));
   
    //three partitions
    assertEquals(0, p.calculatePartition('0', 3));
    assertEquals(0, p.calculatePartition('9', 3));
    assertEquals(0, p.calculatePartition('-', 3));
    assertEquals(0, p.calculatePartition('a', 3));
    assertEquals(0, p.calculatePartition('h', 3));
    assertEquals(0, p.calculatePartition('i', 3));
    assertEquals(1, p.calculatePartition('j', 3));
    assertEquals(1, p.calculatePartition('r', 3));
    assertEquals(2, p.calculatePartition('s', 3));
    assertEquals(2, p.calculatePartition('t', 3));
    assertEquals(2, p.calculatePartition('u', 3));
    assertEquals(2, p.calculatePartition('z', 3));
    assertEquals(2, p.calculatePartition('}', 3));
   
    //26 partitions
    assertEquals(0, p.calculatePartition('0', 26));
    assertEquals(0, p.calculatePartition('9', 26));
    assertEquals(0, p.calculatePartition('-', 26));
    assertEquals(0, p.calculatePartition('a', 26));
    assertEquals(1, p.calculatePartition('b', 26));
    assertEquals(2, p.calculatePartition('c', 26));
    assertEquals(3, p.calculatePartition('d', 26));
    assertEquals(4, p.calculatePartition('e', 26));
    assertEquals(5, p.calculatePartition('f', 26));
    assertEquals(6, p.calculatePartition('g', 26));
    assertEquals(7, p.calculatePartition('h', 26));
    assertEquals(8, p.calculatePartition('i', 26));
    assertEquals(9, p.calculatePartition('j', 26));
    assertEquals(10, p.calculatePartition('k', 26));
    assertEquals(11, p.calculatePartition('l', 26));
    assertEquals(12, p.calculatePartition('m', 26));
    assertEquals(13, p.calculatePartition('n', 26));
    assertEquals(14, p.calculatePartition('o', 26));
    assertEquals(15, p.calculatePartition('p', 26));
    assertEquals(16, p.calculatePartition('q', 26));
    assertEquals(17, p.calculatePartition('r', 26));
    assertEquals(18, p.calculatePartition('s', 26));
    assertEquals(19, p.calculatePartition('t', 26));
    assertEquals(20, p.calculatePartition('u', 26));
    assertEquals(21, p.calculatePartition('v', 26));
    assertEquals(22, p.calculatePartition('w', 26));
    assertEquals(23, p.calculatePartition('x', 26));
    assertEquals(24, p.calculatePartition('y', 26));
    assertEquals(25, p.calculatePartition('z', 26));
  }
 
  /* Test cases for SETPartitioner */
 
  /** single map, single reducer */
  public void testSMSRCalculatePartition() throws Exception
  {
    final JobConf j = new JobConf();
    j.setNumMapTasks(1);
    final SETPartitioner p = new SETPartitioner();
    p.configure(j);
    assertEquals(0, p.calculatePartition(0, 1));
  }
 
  /** multiple map, single reducer */
  public void testMMSRCalculatePartition() throws Exception
  {
    final JobConf j = new JobConf();
    final int maptasks = 20;
    j.setNumMapTasks(maptasks);
    final SETPartitioner p = new SETPartitioner();
    p.configure(j);
    assertEquals(0, p.calculatePartition(0, 1));
    assertEquals(0, p.calculatePartition(19, 1));
    assertEquals(0, p.calculatePartition(10, 1));
   
   
  }
 
  /** multiple map, multiple reducer */
  public void testMMMRCalculatePartition() throws Exception
  {
    final JobConf j = new JobConf();
    final int maptasks = 20;
    j.setNumMapTasks(maptasks);
    final SETPartitioner p = new SETPartitioner();
    p.configure(j);
   
    assertEquals(0, p.calculatePartition(0, 2));
    assertEquals(0, p.calculatePartition(1, 2));
    assertEquals(0, p.calculatePartition(9, 2));
    assertEquals(1, p.calculatePartition(10, 2));
    assertEquals(1, p.calculatePartition(19, 2));
  }
}
TOP

Related Classes of org.terrier.structures.indexing.singlepass.hadoop.TestSplitEmittedTerm

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.